\contentsline {section}{\numberline {1}引言}{2}{section.1}%
\contentsline {subsection}{\numberline {1.1}研究背景与意义}{2}{subsection.1.1}%
\contentsline {subsection}{\numberline {1.2}任务描述}{2}{subsection.1.2}%
\contentsline {section}{\numberline {2}设计思路}{2}{section.2}%
\contentsline {subsection}{\numberline {2.1}整体架构设计}{2}{subsection.2.1}%
\contentsline {subsection}{\numberline {2.2}类结构设计}{3}{subsection.2.2}%
\contentsline {subsubsection}{\numberline {2.2.1}Matrix类}{3}{subsubsection.2.2.1}%
\contentsline {subsubsection}{\numberline {2.2.2}Layer接口与实现类}{3}{subsubsection.2.2.2}%
\contentsline {subsubsection}{\numberline {2.2.3}MLP类}{3}{subsubsection.2.2.3}%
\contentsline {section}{\numberline {3}实现细节}{3}{section.3}%
\contentsline {subsection}{\numberline {3.1}矩阵乘法优化}{3}{subsection.3.1}%
\contentsline {subsubsection}{\numberline {3.1.1}分块计算策略}{4}{subsubsection.3.1.1}%
\contentsline {subsubsection}{\numberline {3.1.2}线程组织优化}{4}{subsubsection.3.1.2}%
\contentsline {subsection}{\numberline {3.2}内存管理优化}{4}{subsection.3.2}%
\contentsline {subsection}{\numberline {3.3}批处理实现}{4}{subsection.3.3}%
\contentsline {section}{\numberline {4}性能测试与分析}{5}{section.4}%
\contentsline {subsection}{\numberline {4.1}测试环境}{5}{subsection.4.1}%
\contentsline {subsection}{\numberline {4.2}测试方法}{5}{subsection.4.2}%
\contentsline {subsection}{\numberline {4.3}测试结果}{5}{subsection.4.3}%
\contentsline {subsubsection}{\numberline {4.3.1}基准性能测试}{5}{subsubsection.4.3.1}%
\contentsline {subsubsection}{\numberline {4.3.2}批次大小对性能的影响}{5}{subsubsection.4.3.2}%
\contentsline {subsubsection}{\numberline {4.3.3}隐藏层维度对性能的影响}{5}{subsubsection.4.3.3}%
\contentsline {subsection}{\numberline {4.4}性能分析与讨论}{5}{subsection.4.4}%
\contentsline {section}{\numberline {5}结论与展望}{5}{section.5}%
\contentsline {subsection}{\numberline {5.1}主要成果}{5}{subsection.5.1}%
\contentsline {subsection}{\numberline {5.2}未来改进方向}{6}{subsection.5.2}%
\contentsline {section}{\numberline {6}参考文献}{6}{section.6}%
\contentsline {section}{\numberline {A}核心代码}{6}{appendix.A}%
\contentsline {subsection}{\numberline {A.1}矩阵乘法核函数}{6}{subsection.A.1}%
\contentsline {subsection}{\numberline {A.2}ReLU激活函数}{6}{subsection.A.2}%
\contentsline {subsection}{\numberline {A.3}前向传播实现}{7}{subsection.A.3}%
